#Lagos download script
lagosne_get(dest_folder = LAGOSNE:::lagos_path())
## Warning in lagosne_get(dest_folder = LAGOSNE:::lagos_path()): LAGOSNE data for this version already exists on the local machine.
## Re-download if neccessary using the 'overwrite` argument.'
#Load in lagos
lagos <- lagosne_load()
## Warning in `_f`(version = version, fpath = fpath): LAGOSNE version
## unspecified, loading version: 1.087.3
#Grab the lake centroid info
lake_centers <- lagos$locus
# Make an sf object
spatial_lakes <- st_as_sf(lake_centers,coords=c('nhd_long','nhd_lat'),
crs=4326)
#Grab the water quality data
nutr <- lagos$epi_nutr
#Look at column names
# names(nutr)
clarity_only <- nutr %>%
select(lagoslakeid,sampledate,chla,doc,secchi) %>%
mutate(sampledate = as.character(sampledate) %>% ymd(.))
#Look at the number of rows of dataset
#nrow(clarity_only)
chla_secchi <- clarity_only %>%
filter(!is.na(chla),
!is.na(secchi))
# How many observations did we lose?
# nrow(clarity_only) - nrow(chla_secchi)
# Keep only the lakes with at least 200 observations of secchi and chla
chla_secchi_200 <- clarity_only %>%
group_by(lagoslakeid) %>%
mutate(count = n()) %>%
filter(count > 200)
# #look at unique sites
# length(unique(chla_secchi_200$lagoslakeid))
# inner_join - all data must have both!
spatial_200 <- inner_join(spatial_lakes,chla_secchi_200 %>%
distinct(lagoslakeid,.keep_all=T),
by='lagoslakeid')
# mapview(spatial_200)
### Take the mean chl_a and secchi by lake
mean_values_200 <- chla_secchi_200 %>%
# Take summary by lake id
group_by(lagoslakeid) %>%
# take mean chl_a per lake id
summarize(mean_chl = mean(chla,na.rm=T),
mean_secchi=mean(secchi,na.rm=T)) %>%
#Get rid of NAs
filter(!is.na(mean_chl),
!is.na(mean_secchi)) %>%
# Take the log base 10 of the mean_chl
mutate(log10_mean_chl = log10(mean_chl))
#Join datasets
mean_spatial <- inner_join(spatial_lakes,mean_values_200,
by='lagoslakeid')
#Make a map
mapview(mean_spatial,zcol='log10_mean_chl')
sites with at least 200 observations?
#Graph Chla vs Secchi Disk:
ggplot(data=chla_secchi_200, aes(x=chla_secchi_200$chla, y=chla_secchi_200$secchi))+
geom_point()+
theme_few()+
xlab("Secchi Disk dDpth")+
ylab("Chl A Conc")
## Warning: Removed 419421 rows containing missing values (geom_point).
#When clarity is low (i.e. shallow secchi depth), Chlorophyll A concentrations is high. This relationship decreases exponentially.
number of counts per site.
#Join datasets
wq_spatial <- inner_join(spatial_lakes,chla_secchi_200,
by='lagoslakeid')
states <- us_states()
wq_spatial_state<-st_join(wq_spatial,states)
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
state and arrange that data from most to least total observations per state.
count_spatial_state<-wq_spatial_state%>%
group_by(state_name)%>%
mutate(state_count = n())%>%
arrange(desc(state_count))
# Top 3 States include: Minnesota, Wisconsin, and Michigan
##3 Is there a spatial pattern in Secchi disk depth for lakes with at least 200 observations?
#Make a map
mean_values_200_secchi <- mean_values_200 %>%
# Take the log base 10 of the mean_secchi
mutate(log10_mean_secchi = log10(mean_secchi))
#Join datasets
mean_spatial_2 <- inner_join(spatial_lakes,mean_values_200_secchi ,
by='lagoslakeid')
mapview(mean_spatial_2 ,zcol='log10_mean_secchi')
# It appears that secchi disk depth follows the same pattern of Chl A Conc. Depths are greatest on the East Coast than in the Great Lakes region